Pairwise alignment refers to comparison between two sequences, where as Multiple Sequence Alignment refers to comparing more than two sequences.
In the exercises below we cover how we can do pairwise alignment using Biostrings package in Bioconductor.
Install Packages Biostrings
Answers to the exercises are available here.
Exercise 1
Create two DNA strings and do pairwise alignment using local, global and overlap alignment techniques and print the score.
#################### # # # Exercise 1 # # # #################### library(Biostrings) DNA1 <-DNAString("ATAGTAGATGCGGCGCGCTAGAG") DNA2 <-DNAString("ATGTCGTCGTAGTCGTGTGTCAC") matrix <- nucleotideSubstitutionMatrix(match = 1, mismatch = -3, baseOnly = TRUE) gAlign <-pairwiseAlignment(DNA1, DNA2, substitutionMatrix = matrix, gapOpening = 3, gapExtension = 1) lAlign <-pairwiseAlignment(DNA1, DNA2, type = "local", substitutionMatrix = matrix, gapOpening = 3, gapExtension = 1) oAlign <-pairwiseAlignment(DNA1, DNA2, type = "overlap", substitutionMatrix = matrix, gapOpening = 3, gapExtension = 1) print(lAlign)
## Local PairwiseAlignmentsSingleSubject (1 of 1) ## pattern: [4] GTAG ## subject: [9] GTAG ## score: 4
print(gAlign)
## Global PairwiseAlignmentsSingleSubject (1 of 1) ## pattern: [1] ATAGTAGATGCGGCGCGCTAG---------AG ## subject: [1] AT-GT-----CGTCG---TAGTCGTGTGTCAC ## score: -24
print(oAlign)
## Overlap PairwiseAlignmentsSingleSubject (1 of 1) ## pattern: [1] "" ## subject: [1] "" ## score: 0
Exercise 2
Create two DNA strings and do pairwise alignment and write the alignment to an .aln file.
#################### # # # Exercise 2 # # # #################### library(Biostrings) DNA1 <-DNAString("ATAGTAGATGCGGCGCGCTAGAG") DNA2 <-DNAString("ATGTCGTCGTAGTCGTGTGTCAC") matrix <- nucleotideSubstitutionMatrix(match = 1, mismatch = -3) gAlign <-pairwiseAlignment(DNA1, DNA2, substitutionMatrix = matrix, gapOpening = 3, gapExtension = 1) print("Written to File, Available in your default R directory")
## [1] "Written to File, Available in your default R directory"
writePairwiseAlignments(gAlign,file="Alignment.aln")
Exercise 3
Create two Amino acid strings and do pairwise alignment
#################### # # # Exercise 3 # # # #################### library(Biostrings) Amino1 <- AAString("MALWMRLLPAAAFLALWGPD") Amino2 <- AAString("MALWMRLLPLLALLALWGPD") pAlign<-pairwiseAlignment(Amino1,Amino2) print(pAlign)
## Global PairwiseAlignmentsSingleSubject (1 of 1) ## pattern: [1] MALWMRLLPAAAFLALWGPD ## subject: [1] MALWMRLLPLLALLALWGPD ## score: 54.44525
Exercise 4
Create two Amino acid strings and do pairwise alignment using BLOSUM62 substitution matrix.
#################### # # # Exercise 4 # # # #################### library(Biostrings) Amino1 <- AAString("MALWMRLLPAAAFLALWGPD") Amino2 <- AAString("MALWMRLLPLLALLALWGPD") pAlign<-pairwiseAlignment(Amino1,Amino2,substitutionMatrix = "BLOSUM62", gapOpening = 3, gapExtension = 1) print(pAlign)
## Global PairwiseAlignmentsSingleSubject (1 of 1) ## pattern: [1] MALWMRLLPAAAFLALWGPD ## subject: [1] MALWMRLLPLLALLALWGPD ## score: 93
Exercise 5
Create two Amino acid strings and do pairwise alignment using BLOSUM100 substitution matrix
#################### # # # Exercise 5 # # # #################### library(Biostrings) Amino1 <- AAString("MALWMRLLPAAAFLALWGPD") Amino2 <- AAString("MALWMRLLPLLALLALWGPD") pAlign<-pairwiseAlignment(Amino1,Amino2,substitutionMatrix = "BLOSUM100", gapOpening = 3, gapExtension = 1) print(pAlign)
## Global PairwiseAlignmentsSingleSubject (1 of 1) ## pattern: [1] MALWMRLLPAAAFLALWGPD ## subject: [1] MALWMRLLPLLALLALWGPD ## score: 167
Exercise 6
Create two Amino acid strings and do pairwise alignment using PAM250 substitution matrix
#################### # # # Exercise 6 # # # #################### library(Biostrings) Amino1 <- AAString("MALWMRLLPAAAFLALWGPD") Amino2 <- AAString("MALWMRLLPLLALLALWGPD") pAlign<-pairwiseAlignment(Amino1,Amino2,substitutionMatrix = "PAM250", gapOpening = 3, gapExtension = 1) print(pAlign)
## Global PairwiseAlignmentsSingleSubject (1 of 1) ## pattern: [1] MALWMRLLPAAAFLALWGPD ## subject: [1] MALWMRLLPLLALLALWGPD ## score: 107
Exercise 7
Compare between BLOSUM62 substitution matrix of R and that of the NCBI Database using any two amino acid of your choice.
#################### # # # Exercise 7 # # # #################### library(Biostrings) data(BLOSUM62) rvalues<-BLOSUM62["B", "D"] ncbifile <- "ftp://ftp.ncbi.nih.gov/blast/matrices/BLOSUM62" blosum <- as.matrix(read.table(ncbifile)) ncbivalues<-blosum["B", "D"] print(rvalues)
## [1] 4
print(ncbivalues)
## [1] 4
Exercise 8
Do pairwise alignment using Needlemann Wunch Alignment algorithm and print the score, suppress any warnings.
#################### # # # Exercise 8 # # # #################### options(warn=-1) library(Biostrings) Amino1 <- AAString("MALWMRLLPAAAFLALWGPD") Amino2 <- AAString("MALWMRLLPLLALLALWGPD") align<-needwunsQS(Amino1,Amino2, substmat = "BLOSUM62") print(align)
## Global PairwiseAlignments (1 of 1) ## pattern: [1] MALWMRLLPAAAFLALWGPD ## subject: [1] MALWMRLLPLLALLALWGPD ## score: 93
Exercise 9
Create two DNA Strings and translate the same to amino acids and do pairwise alignment between the amino acid sequences
#################### # # # Exercise 9 # # # #################### library(Biostrings) DNA1 <-DNAString("ATAGTAGATGCGGCGCGCTAGAG") DNA2 <-DNAString("ATGTCGTCGTAGTCGTGTGTCAC") translatedAmino1<-translate(DNA1) translatedAmino2<-translate(DNA2) align<-pairwiseAlignment(translatedAmino1,translatedAmino2) print(align)
## Global PairwiseAlignmentsSingleSubject (1 of 1) ## pattern: [1] IVDAAR* ## subject: [1] MSS*SCV ## score: -43.67328
Exercise 10
Create two RNA Strings and translate the same to amino acids and do pairwise alignment between the amino acid sequences
#################### # # # Exercise 10 # # # #################### library(Biostrings) RNA1 <-RNAString("AUAGUAGAUGCGGCGCGCUAGAG") RNA2 <-RNAString("AUGUCGUCGUAGUCGUGUGUCAC") translatedRNA1<-translate(RNA1) translatedRNA2<-translate(RNA2) align<-pairwiseAlignment(translatedRNA1,translatedRNA2) print(align)
## Global PairwiseAlignmentsSingleSubject (1 of 1) ## pattern: [1] IVDAAR* ## subject: [1] MSS*SCV ## score: -43.67328